
clear
set more off

/*********************************************************************************
Name: hw_effort.do

Data In: [Data/Original/observation_days_centers.dta,
		  Data/Intermediate/hw_roster_by_center.dta,
		  Data/Intermediate/hw_controls.dta,
		  Data/Intermediate/adherence.dta,
		  Data/Intermediate/baseline_opasha_visit.dta,
		  Data/Original/spot_checks.dta,
		  Data/Original/hw_roster.dta,
		  Data/Original/hw_surveys.dta,
		  Data/Intermediate/hw_roster_by_hw.dta,
		  Data/Original/patient_surveys_attempts.dta,
		  Data/Original/patient_surveys.dta,
		  Data/Intermediate/patient_controls.dta,
		  Data/Intermediate/verified_patients.dta,
		  Data/Original/salary_slips.dta]

Data Out: [Data/Intermediate/Table4_1_sh.dta,
		   Data/Intermediate/Table4_2_sh.dta,
		   Data/Intermediate/Table8_PanelA_2_sh.dta,
		   Data/Intermediate/Table8_PanelB_2_sh.dta,
		   Data/Intermediate/Table8_PanelB_1_sh.dta,
		   Data/Intermediate/Table8_PanelA_1_sh.dta]

Results Out: [Results/Paper/Table4_1.out,
			  Results/Appendix/TableC2_1.log,
			  Results/Appendix/TableD2_1.log,
			  Results/Appendix/TableC2_2.log,
			  Results/Appendix/TableD2_2.log,
			  Results/Appendix/TableE2_1.out,
			  Results/Appendix/TableA6.out,
			  Results/Paper/Table8_PanelC.out,
			  Results/Appendix/FigureA2.png,
			  Results/Paper/Table4_2.out,
			  Results/Appendix/TableC2_3.log,
			  Results/Appendix/TableD2_3.log,
			  Results/Appendix/TableE2_2.out,
			  Results/Paper/Table8_PanelA_2.out,
			  Results/Appendix/TableA8.out,
			  Results/Paper/Table8_PanelB.out,
			  Results/Appendix/TableB3.out,
			  Results/Paper/Table8_PanelA_1.out]

Purpose of do-file: Estimating the treatment's impact on health worker effort and assessing the sustainability of the intervention

Organization: PART-1: Estimating the treatment's impact on health worker effort, using observation days
			  PART-2: Analyzing changes in technology presence over time, using observation days
			  PART-3: Estimating the treatment's impact on health worker effort, using random spot checks
			  PART-4: Estimating the treatment's impact on health worker effort and satisfaction, using health worker surveys
			  PART-5: Estimating the treatment's impact on health worker effort and patient satisfaction, using patient surveys
			  PART-6: Estimating the treatment's impact on health worker compensation
*********************************************************************************/

* Setting path directory
cd "${DIRECTORY}"


****************************************
*** PART-1 *** Estimating the treatment's impact on health worker effort, using observation days
****************************************

** Calling and merging datasets

use "Data/Original/observation_days_centers.dta",clear

merge m:1 UID_Center using "Data/Intermediate/hw_roster_by_center.dta", gen(_mergeCenterData)
assert _mergeCenterData == 3
drop _mergeCenterData

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta", gen(_mergeCounsControls)
assert _mergeCounsControls != 1
drop if _mergeCounsControls == 2 
drop _mergeCounsControls

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"

global hw_controls_2 = "b_jobs_bef_oa_couns b_age_couns gen_caste_couns caste_st_couns religion_oth_couns same_nbhd_couns gen_caste_dum_couns caste_st_dum_couns religion_oth_dum_couns b_jobs_bef_oa_dum_couns same_nbhd_dum_couns b_age_dum_couns"
							 
gen before_exp = visit_date < expstartdate

bys unique_mon_instance (evening_form_yn): gen N_halfday_inst = _N
bys unique_mon_instance (evening_form_yn): gen n_halfday_inst = _n


** Creating and formatting date-time variables

foreach var in center_open_time cntr_closing_time monitor_depart_time couns_starttime couns_deprt_time {
	
	replace `var'_hh = 0 if `var'_hh == 24
	replace `var'_hh = . if !inrange(`var'_hh,0,23)
	replace `var'_mm = 0 if !inrange(`var'_mm,0,59)
	
	foreach tt in hh mm {
		
		qui count if missing(`var'_`tt')
		local miss_`var'_`tt' = `r(N)'
	
	}
	
	gen `var' = hms(`var'_hh,`var'_mm,0)
	format `var' %tcHH:MM
	
	qui count if missing(`var')
	local miss_`var' = `r(N)'
	
	assert `miss_`var'' == max(`miss_`var'_hh',`miss_`var'_mm')
	
	order `var'_hh `var',after(`var'_mm)
	
}
	

** Creating center-level variables

* Variable indicating if center was open
gen center_open = inlist(center_open_yn,1,3) if !missing(center_open_yn)
// The center is considered open both for responses "Yes" and "Somebody's House."
replace center_open = 1 if !missing(cntr_closing_time_hh) | !missing(center_open_time_hh)
bys unique_mon_instance: egen center_open_day = max(center_open)

* Duration that center was open for, in minutes
gen center_open_duration = (cntr_closing_time - center_open_time)/(1000*60)
replace center_open_duration = (monitor_depart_time - center_open_time)/(1000*60) if missing(center_open_duration) 
replace center_open_duration = . if center_open_duration < 0
replace center_open_duration = 0 if center_open == 0
replace center_open_duration = . if center_open == .
bys unique_mon_instance: egen center_open_duration_day = total(center_open_duration), missing

* Visit by Operation ASHA
tab opasha_visit_yn, m
replace opasha_visit_yn = 0 if opasha_visit_yn == 2
replace opasha_visit_yn = 0 if center_open ~= . & opasha_visit_yn == .
bys unique_mon_instance: egen opasha_visit_day = max(opasha_visit_yn)


** Creating health-worker-level variables
						
* Correcting main measure of health worker presence
replace couns_present_yn = 0 if couns_present_yn == 2
replace couns_present_yn = 1 if !missing(couns_deprt_time_hh) | !missing(couns_starttime_hh)
// We follow the same logic as for the variable indicating whether the center is open
bys unique_mon_instance: egen couns_present = max(couns_present_yn)

* Creating time variables for health worker breaks

foreach n in 1 2 3 4 5 6 {
		
	if `n' > 4 {
		continue
	}
			
	foreach x in arr_time dprt_time {
				
		* Cleaning hour and minute variables if they have valid values
		replace couns_brk_`x'_hh`n' = 0 if couns_brk_`x'_hh`n' == 24
		replace couns_brk_`x'_hh`n' = . if !inrange(couns_brk_`x'_hh`n',0,23)
		replace couns_brk_`x'_mm`n' = 0 if !inrange(couns_brk_`x'_mm`n',0,59)
		
		foreach tt in hh mm {
		
			qui count if missing(couns_brk_`x'_`tt'`n')
			local miss_couns_brk_`x'_`tt'`n' = `r(N)'
	
	}
		
	gen couns_brk_`x'_`n' = hms(couns_brk_`x'_hh`n',couns_brk_`x'_mm`n',0)
	format couns_brk_`x'_`n' %tcHH:MM
		
	qui count if missing(couns_brk_`x'_`n')
	local miss_couns_brk_`x'_`n' = `r(N)'
		
	* Checking that number of missing values in created variables does not exceed number of missing values in raw variables
	assert `miss_couns_brk_`x'_`n'' == max(`miss_couns_brk_`x'_hh`n'',`miss_couns_brk_`x'_mm`n'')
		
	order couns_brk_`x'_hh`n' couns_brk_`x'_`n',after(couns_brk_`x'_mm`n')
		
	}
		
gen couns_brk_duration_`n' = (couns_brk_arr_time_`n' - couns_brk_dprt_time_`n')/(1000*60)
replace couns_brk_duration_`n' = . if couns_brk_duration_`n' <= 0
order couns_brk_duration_`n',after(couns_brk_dprt_time_`n')
}
	
egen couns_brk_duration_total = rowtotal(couns_brk_duration_?), missing

* Total time spent by health worker in center
gen couns_time_total = (couns_deprt_time - couns_starttime)/(1000*60)
// Since there are significant number of instances where couns_starttime is present but couns_deprt_time is missing, and in a large majority of cases, monitor depart time is later than health worker depart time whenever both are non-missing, we proxy health worker depart time by using monitor depart time.
// This is because we think that the reason health worker depart time is missing is because monitor had to leave earlier. Also, this will give us a lower bound for time spent by health worker in center.
replace couns_time_total = (monitor_depart_time - couns_starttime)/(1000*60) if missing(couns_time_total)
replace couns_time_total = . if couns_time_total < 0
replace couns_time_total = 0 if couns_present_yn == 0
replace couns_time_total = . if couns_present_yn == .

bys unique_mon_instance: egen couns_total_time_day = total(couns_time_total), missing
bys unique_mon_instance: egen temp = max(couns_time_total)
replace couns_total_time_day = temp if observation_type == 3
drop temp
// In paper center forms, health worker data is duplicated. We do the correction above to avoid double counting. 
// We correct in that particular way for the following reason: above, we use monitor_depart_time to proxy for couns_deprt_time when the latter is missing. But we need to make sure we're using the right (latest) monitor_depart_time when there are 2 different ones.
replace couns_total_time_day = 0 if couns_present == 0
replace couns_total_time_day = . if couns_present == .
		
* Correcting breaks taken by health workers by removing those breaks which were for genuine work reasons
// Reasons considered as genuine: "5: went for home visit/to visit patient; 6: went to dmc for work-related purposes; 14: some hospital work ; 16: to meet/receive some other Op Asha counselor/ program manager"

gen couns_brk_duration_total_slack = couns_brk_duration_total - ///
									cond(inlist(couns_brk_reason1,5,6,14,16),cond(missing(couns_brk_duration_1),0,couns_brk_duration_1),0) - ///
									cond(inlist(couns_brk_reason2,5,6,14,16),cond(missing(couns_brk_duration_2),0,couns_brk_duration_2),0) - ///
									cond(inlist(couns_brk_reason3,5,6,14,16),cond(missing(couns_brk_duration_3),0,couns_brk_duration_3),0) - ///
									cond(inlist(couns_brk_reason4,5,6,14,16),cond(missing(couns_brk_duration_4),0,couns_brk_duration_4),0)
replace couns_brk_duration_total_slack = 0 if !missing(couns_present) & missing(couns_brk_duration_total_slack)
// We could "accept" missings when we know a break was taken but we don't know how long it lasted; but we want as few missings as possible, as we later use break duration to compute total time of effective presence in the center, and breaks are a minor component of it
replace couns_brk_duration_total_slack = 0 if couns_present == 0
replace couns_brk_duration_total_slack = . if couns_present == .
// Replacing health worker break duration to 0 if counselor_present is non-missing and break time is missing. Replacing it with 0 even when health worker is not present because we want to ultimately estimate total time spent working in center, net of breaks, and so we need break duration variable to be non-missing when health worker is absent.

* Time spent by health worker in center, net of breaks
gen couns_time_net = couns_total_time_day - couns_brk_duration_total_slack
replace couns_time_net = . if couns_time_net < 0
replace couns_time_net = 0 if couns_present == 0
replace couns_time_net = . if couns_present == .


** Creating variables measuring technology presence
		
* Biometric device received
replace biom = 0 if biom == 2
bys unique_mon_instance: egen biom_partday = max(biom)

* Technology variables
rename usb_present usb_present_yn

foreach tech in fpr laptop usb {
	
	bys unique_mon_instance: egen temp1 = total(`tech'_present_yn == 1)
	bys unique_mon_instance: egen temp1_missing = total(missing(`tech'_present_yn))
	bys unique_mon_instance: gen `tech'_present_partday = (temp1 >= 1) if (biom_partday == 0) | (biom_partday == 1 & temp1_missing < N_halfday_inst)
	// Tech_present should be non-missing when we know that biom was NOT present (biom == 0) or when we know that biom was present AND at least some of the technology questions are non-missing. 
	
	bys unique_mon_instance: egen temp2 = total(`tech'_working_yn ==1)
	bys unique_mon_instance: egen temp2_missing = total(missing(`tech'_working_yn))
	bys unique_mon_instance: gen `tech'_working_partday = (temp2 >= 1) if (`tech'_present_partday == 0) | (`tech'_present_partday == 1 & temp2_missing < N_halfday_inst)
	// Tech_working should be non-missing when we know that tech was not present, or when we know that tech was present AND at least 1 of the technology working questions are non-missing. 
	
	drop temp? temp?_missing
	
}

* Constructing composite variables combining all technologies

// We do not include "other" devices, since they are not measured in mobile centers.
egen tech_number_present_main = rowmean(fpr_present_partday laptop_present_partday usb_present_partday)
egen tech_number_working_main = rowmean(fpr_working_partday laptop_working_partday usb_working_partday)
// This method to find the average presence of technology ignores missing values in both numerator and denominator. For example, if a particular device is coded as "missing", and all others are "Yes", the average will be "1" and not "5/6".


** Creating variables on number of patient visits and mean patient adherence observed during each monitoring visit

preserve
	keep unique_mon_instance
	duplicates drop
	
	merge 1:m unique_mon_instance using "Data/Intermediate/adherence.dta", gen(_mergePatData)	
	drop if _mergePatData == 2
	drop _mergePatData
	
	* Creating monitoring-day level adherence metric
	bys unique_mon_instance: egen temp2 = mean(adherence_2) if inlist(patient_row,1,3)
	bys unique_mon_instance: egen mean_adherence_2 = max(temp2)
	drop temp?
	
	* Creating fraction of home visits for each monitoring-instance

	bys unique_mon_instance: egen N_patients_all = total(patient_row == 1 | patient_row == 3)
	bys unique_mon_instance: egen N_patients_secondary = total(patient_row == 2)
	gen frac_patients_secondary = N_patients_secondary/N_patients_all
	
	duplicates drop unique_mon_instance, force
	
	keep unique_mon_instance frac_patients_secondary mean_adherence_2
	
	tempfile pat_aggregate_day
	save `pat_aggregate_day'	
restore

merge m:1 unique_mon_instance using `pat_aggregate_day', gen(_mergePatAgg)
assert _mergePatAgg == 3
drop _mergePatAgg


** Checking that the fraction of visits in control group is not significantly different between baseline and actual outcomes (comparison between Table 1, Panel C and Table 4, column 7)

preserve

keep if n_halfday_inst == 1 & before_exp == 0
keep uid_cluster opasha_visit_day final_stratum_id1-final_stratum_id13 treatment
gen baseline = 0

append using "Data/Intermediate/baseline_opasha_visit.dta"

ivreg2 opasha_visit_day baseline if treatment == 0, small cl(uid_cluster)

restore


** Estimating the treatment's impact on health worker effort, using observation days: Table 4, columns 1, 3, 5, 6, and 7, and Table A8, column 1

* Sidak-Holm correction

preserve

ivreg2 center_open_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test treatment
gen center_open_day_p2 = `r(p)'
ivreg2 center_open_duration_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test treatment
gen center_open_duration_day_p2 = `r(p)'
ivreg2 opasha_visit_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test treatment
gen opasha_visit_day_p2 = `r(p)'
ivreg2 couns_present treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test treatment
gen couns_present_p2 = `r(p)'
ivreg2 couns_time_net treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test treatment
gen couns_time_net_p2 = `r(p)'

keep center_open_day_p* center_open_duration_day_p* opasha_visit_day_p* couns_present_p* couns_time_net_p*
gen id = 99
keep in 1

reshape long center_open_day_p center_open_duration_day_p opasha_visit_day_p couns_present_p couns_time_net_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table4_1_sh.dta", replace
	
restore

su center_open_day if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0 & treatment == 0
local mean_control=r(mean)
ivreg2 center_open_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

su couns_present if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_present treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

su center_open_duration_day if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0 & treatment == 0
local mean_control=r(mean)
ivreg2 center_open_duration_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(1) dec(1) keep(treatment)

su couns_time_net if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_time_net treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(1) dec(1) keep(treatment)

su opasha_visit_day if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 opasha_visit_day treatment $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

su frac_patients_secondary if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0 & treatment == 0
local mean_control=r(mean)
ivreg2 frac_patients_secondary treatment $hw_controls final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Paper/Table4_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

* Wild cluster bootstrap or pairs cluster bootstrap: Table C2, columns 1, 3, 5, 6, and 7, and Table D2, columns 1, 3, 5, 6, and 7

preserve 

keep if n_halfday_inst == 1 & before_exp == 0

foreach var in b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_ {
rename `var'couns `var'
}

global hw_controls_b = "b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_"

* wild cluster bootstrap

log using "Results/Appendix/TableC2_1.log", replace
foreach var in couns_present couns_time_net opasha_visit_day{
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)
}
log close

* pairs cluster bootstrap

log using "Results/Appendix/TableD2_1.log", replace
foreach var in couns_present couns_time_net opasha_visit_day{
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
}
log close

keep if inlist(observation_type, 3,4)

* wild cluster bootstrap

log using "Results/Appendix/TableC2_2.log", replace
foreach var in center_open_day center_open_duration_day{
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)
}
log close

* pairs cluster bootstrap

log using "Results/Appendix/TableD2_2.log", replace
foreach var in center_open_day center_open_duration_day{
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
}
log close

restore

* With second set of controls: Table E2, columns 1, 3, 5, 6, and 7

su center_open_day if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0 & treatment == 0
local mean_control=r(mean)
ivreg2 center_open_day treatment $hw_controls_2 final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableE2_1.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

su couns_present if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_present treatment $hw_controls_2 final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableE2_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)

su center_open_duration_day if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0 & treatment == 0
local mean_control=r(mean)
ivreg2 center_open_duration_day treatment $hw_controls_2 final_stratum_id1-final_stratum_id13 if inlist(observation_type, 3,4) & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableE2_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(1) dec(1) keep(treatment)

su couns_time_net if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_time_net treatment $hw_controls_2 final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableE2_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(1) dec(1) keep(treatment)

su opasha_visit_day if treatment == 0 & n_halfday_inst == 1 & before_exp == 0
local mean_control=r(mean)
ivreg2 opasha_visit_day treatment $hw_controls_2 final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 treatment using "Results/Appendix/TableE2_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker Controls, Yes) adec(3) dec(3) keep(treatment)


** Assessing the responsiveness of Operation ASHA's visits to patients' treatment adherence and health workers' presence at the center: Table A6

* Creating lagged variables for patient adherence as well as health worker presence and time spent in center

sort UID_Center n_halfday_inst visit_date

forvalues j = 1/4 {
	
gen mean_adherence_2_lag`j' = mean_adherence_2[_n-`j'] if n_halfday_inst == 1 & UID_Center[_n] == UID_Center[_n-`j']
	
}
	
egen mean_adherence_2_lag_all = rowmean(mean_adherence_2_lag?)
gen mean_adh2_lag_trt_intrction = mean_adherence_2_lag_all * treatment

sort UID_Center n_halfday_inst visit_date

forvalues j = 1/4 {
	
gen couns_present_lag`j' = couns_present[_n-`j'] if n_halfday_inst == 1 & UID_Center[_n] == UID_Center[_n-`j']
gen couns_time_net_lag`j' = couns_time_net[_n-`j'] / 60 if n_halfday_inst == 1 & UID_Center[_n] == UID_Center[_n-`j']

}

egen couns_present_lag_all = rowmean(couns_present_lag?)
gen couns_present_lag_trt = couns_present_lag_all*treatment
egen couns_time_net_lag_all = rowmean(couns_time_net_lag?)
gen couns_time_lag_trt = couns_time_net_lag_all * treatment

* Regressions

ivreg2 opasha_visit_day mean_adherence_2_lag_all treatment mean_adh2_lag_trt_intrction final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 mean_adherence_2_lag_all mean_adh2_lag_trt_intrction using "Results/Appendix/TableA6.out", replace nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes) dec(3) keep(mean_adherence_2_lag_all mean_adh2_lag_trt_intrction treatment)
ivreg2 opasha_visit_day mean_adherence_2_lag_all treatment mean_adh2_lag_trt_intrction $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 mean_adherence_2_lag_all mean_adh2_lag_trt_intrction using "Results/Appendix/TableA6.out", append nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes, Health worker Controls, Yes) dec(3) keep(mean_adherence_2_lag_all mean_adh2_lag_trt_intrction treatment)

ivreg2 opasha_visit_day couns_present_lag_all treatment couns_present_lag_trt final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 couns_present_lag_all couns_present_lag_trt using "Results/Appendix/TableA6.out", append nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes) dec(3) keep(couns_present_lag_all couns_present_lag_trt treatment)
ivreg2 opasha_visit_day couns_present_lag_all treatment couns_present_lag_trt $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 couns_present_lag_all couns_present_lag_trt using "Results/Appendix/TableA6.out", append nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes, Health worker Controls, Yes) dec(3) keep(couns_present_lag_all couns_present_lag_trt treatment)

ivreg2 opasha_visit_day couns_time_net_lag_all treatment couns_time_lag_trt final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 couns_time_net_lag_all couns_time_lag_trt using "Results/Appendix/TableA6.out", append nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes) dec(3) keep(couns_time_net_lag_all couns_time_lag_trt treatment)
ivreg2 opasha_visit_day couns_time_net_lag_all treatment couns_time_lag_trt $hw_controls final_stratum_id1-final_stratum_id13 if n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 couns_time_net_lag_all couns_time_lag_trt using "Results/Appendix/TableA6.out", append nolabel asterisk(se) nocons nonote se addtext(Strata fixed effects, Yes, Health worker Controls, Yes) dec(3) keep(couns_time_net_lag_all couns_time_lag_trt treatment)


****************************************
*** PART-2 *** Analyzing changes in technology presence over time, using observation days
****************************************

** Regressions: Table 8, Panel C

gen time_from_exp = visit_date - expstartdate
gen weeks_from_exp = floor(time_from_exp/7)
replace time_from_exp = time_from_exp / 7

* Sidak-Holm correction

preserve

foreach y in biom_partday tech_number_present_main tech_number_working_main{
ivreg2 `y' time_from_exp i.UID_Center $hw_controls if treatment == 1 & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
test time_from_exp
gen `y'_p2 = `r(p)'
}	

keep biom_partday_p* tech_number_present_main_p* tech_number_working_main_p*
gen id = 99
keep in 1
reshape long biom_partday_p tech_number_present_main_p tech_number_working_main_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable	
gen number = _N
foreach y in pval_2 {
	sort `y'
	gen `y'_k=(_N+1)-_n	
	generate `y'_sidak=1-(1-`y')^`y'_k
	replace  `y'_sidak=`y'_sidak[_n-1] if `y'_sidak[_n-1]>`y'_sidak in 2/L
	replace  `y'_sidak=1 if `y'_sidak>1 & `y'_sidak~=.
	drop `y'_k
}
drop number
order variable pval_2*

foreach y in biom_partday_p tech_number_present_main_p tech_number_working_main_p{
	sum pval_2_sidak if variable=="`y'"
	local `y'2 = string(r(mean))
}

restore

su biom_partday if treatment == 1 & n_halfday_inst == 1 & before_exp == 0 & weeks_from_exp <= 4
local mean_firstmonth=r(mean)
ivreg2 biom_partday time_from_exp i.UID_Center $hw_controls if treatment == 1 & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
quietly outreg2 time_from_exp using "Results/Paper/Table8_PanelC.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in First Month", `mean_firstmonth', "Sidak-Holm p-val", `biom_partday_p2') addtext(Center fixed effects, Yes) adec(3) dec(3) keep(time_from_exp)

foreach var in tech_number_present_main tech_number_working_main{
	su `var' if treatment == 1 & n_halfday_inst == 1 & before_exp == 0 & weeks_from_exp <= 4
	local mean_firstmonth=r(mean)
	ivreg2 `var' time_from_exp i.UID_Center $hw_controls if treatment == 1 & n_halfday_inst == 1 & before_exp == 0, small cl(uid_cluster)
	quietly outreg2 time_from_exp using "Results/Paper/Table8_PanelC.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in First Month", `mean_firstmonth', "Sidak-Holm p-val", ``var'_p2') addtext(Center fixed effects, Yes) adec(3) dec(3) keep(time_from_exp)
}


** Graph: Figure A2

preserve
	
	keep if n_halfday_inst == 1 & treatment == 1 & inrange(weeks_from_exp,0,60) // Removing one outlier observation 
	
	collapse (mean) biom_partday tech_number_present_main (count) unique_mon_instance, by(weeks_from_exp treatment) 
	replace tech_number_present_main = 100*tech_number_present_main
	replace biom_partday = 100*biom_partday
	
	twoway 	(line biom_partday weeks_from_exp, yaxis(1)) ///
			(line tech_number_present_main weeks_from_exp, yaxis(1) lpattern(dash) lcolor(%75)), ///
			xlabel(0(5)60,labsize(small)) xtitle("Weeks from beginning of experiment" " ",size(small)) ///
			ylabel(0(10)100, axis(1) labsize(small)) ytitle("Technology presence (%)" " ",size(small)) ///
			///ylabel(0(1)6, axis(2) labsize(small)) ytitle("aaa number of technology equipment present per day" " ",size(small)) ///
			title("Technology presence in Biometric Centers over Time") ///
			legend(position(6) label(1 "Center has biometric equipment") label(2 "Share of all devices (fingerprint reader, laptop, USB key) present") cols(1))
			
	graph export "Results/Appendix/FigureA2.png", replace

restore


****************************************
*** PART-3 *** Estimating the treatment's impact on health worker effort, using random spot checks
****************************************

** Calling and merging datasets

use "Data/Original/spot_checks.dta", clear

merge m:1 UID_Center Unique_ID using "Data/Original/hw_roster.dta"
assert _merge ~= 1
keep if _merge == 3
drop _merge

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
assert _merge ~= 1
keep if _merge == 3
drop _merge

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"

global hw_controls_2 = "b_jobs_bef_oa_couns b_age_couns gen_caste_couns caste_st_couns religion_oth_couns same_nbhd_couns gen_caste_dum_couns caste_st_dum_couns religion_oth_dum_couns b_jobs_bef_oa_dum_couns same_nbhd_dum_couns b_age_dum_couns"

gen visit_date=mdy(visit_date_mo, visit_date_dd, visit_date_yyyy)
gen before_exp = visit_date < expstartdate


** Creating the outcome variables

* Center open or not
gen center_open = inlist(center_open_yn,1,3) if !missing(center_open_yn)

* Health worker present or not
tab couns_present_yn, m


** Regressions: Table 4, columns 2 and 4

* Sidak-Holm correction

preserve

foreach y in center_open couns_present_yn{
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 $hw_controls if before_exp == 0, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep center_open_p* couns_present_yn_p*
gen id = 99
keep in 1

reshape long center_open_p couns_present_yn_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table4_2_sh.dta", replace
	
restore

su center_open if treatment == 0 & before_exp == 0
local mean_control=r(mean)
ivreg2 center_open treatment final_stratum_id1-final_stratum_id13 $hw_controls if before_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table4_2.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

su couns_present_yn if treatment == 0 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_present_yn treatment final_stratum_id1-final_stratum_id13 $hw_controls if before_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table4_2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

* Wild cluster bootstrap or pairs cluster bootstrap: Table C2, columns 2 and 4, and Table D2, columns 2 and 4

preserve

keep if before_exp == 0

foreach var in b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_ {
rename `var'couns `var'
}

global hw_controls_b = "b_age_ b_jobs_bef_oa_ male_ gen_caste_ hindu_ twelveandbelow_ tertiary_ hhd_size_ ownhouse_ b_age_dum_ b_jobs_bef_oa_dum_ male_dum_ gen_caste_dum_ hindu_dum_ twelveandbelow_dum_ tertiary_dum_ hhd_size_dum_ ownhouse_dum_"

* wild cluster bootstrap

log using "Results/Appendix/TableC2_3.log", replace
foreach var in center_open couns_present_yn{
wildbootstrap regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) reps(5000) rseed(13915183)
}
log close

* pairs cluster bootstrap

log using "Results/Appendix/TableD2_3.log", replace
foreach var in center_open couns_present_yn{
clustse regress `var' treatment final_stratum_id1-final_stratum_id13 $hw_controls_b, cluster(uid_cluster) method(pairs) reps(5000) seed(13915183)
}
log close

restore

* With second set of controls: Table E2, columns 2 and 4

su center_open if treatment == 0 & before_exp == 0
local mean_control=r(mean)
ivreg2 center_open treatment final_stratum_id1-final_stratum_id13 $hw_controls_2 if before_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE2_2.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

su couns_present_yn if treatment == 0 & before_exp == 0
local mean_control=r(mean)
ivreg2 couns_present_yn treatment final_stratum_id1-final_stratum_id13 $hw_controls_2 if before_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableE2_2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)


****************************************
*** PART-4 *** Estimating the treatment's impact on health worker effort and satisfaction, using health worker surveys
****************************************

** Calling and merging datasets

use "Data/Original/hw_surveys.dta", clear

merge 1:1 Unique_ID using "Data/Intermediate/hw_roster_by_hw.dta"
count if _merge ~= 3
drop _merge

merge 1:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
count if _merge ~= 3
drop _merge

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"


** Number of challenges faced in getting a patient to complete treatment course

foreach var of varlist m_duration_long m_long_distance m_no_follow_up m_multiple_visits_counsel m_pat_migrate m_pat_uncoop m_pvt_care m_gain_pat_trust m_chal_low_lit m_loss_work m_challenges_oth ///
						e_duration_long e_long_distance e_no_follow_up e_multiple_visits_counsel e_pat_migrate e_pat_uncoop e_pvt_care e_gain_pat_trust e_chal_low_lit e_loss_work {
	replace `var'=. if `var'==-111|`var'==-333|`var'==-555
	replace `var'=1 if `var'!=. & `var'!=0
	assert `var' == 0 | `var' == 1 | `var' == .
}

egen temp = rownonmiss(m_duration_long m_long_distance m_no_follow_up m_multiple_visits_counsel m_pat_migrate m_pat_uncoop m_pvt_care m_gain_pat_trust m_chal_low_lit m_loss_work m_challenges_oth)
egen challenges_completion1 = anycount(m_duration_long m_long_distance m_no_follow_up m_multiple_visits_counsel m_pat_migrate m_pat_uncoop m_pvt_care m_gain_pat_trust m_chal_low_lit m_loss_work m_challenges_oth), v(1)
replace challenges_completion1 = . if temp == 0
drop temp

egen temp = rownonmiss(e_duration_long e_long_distance e_no_follow_up e_multiple_visits_counsel e_pat_migrate e_pat_uncoop e_pvt_care e_gain_pat_trust e_chal_low_lit e_loss_work)
egen challenges_completion2 = anycount(e_duration_long e_long_distance e_no_follow_up e_multiple_visits_counsel e_pat_migrate e_pat_uncoop e_pvt_care e_gain_pat_trust e_chal_low_lit e_loss_work), v(1)
replace challenges_completion2 = . if temp == 0
drop temp

foreach var of varlist b_pvt_care b_pat_uncoop b_pat_migrate b_no_follow_up b_multiple_visits_counsel b_loss_work b_long_distance b_gain_pat_trust b_duration_long b_challenges_oth b_chal_low_lit{
	replace `var'=. if `var'==-111|`var'==-333|`var'==-555
	replace `var'=1 if `var'!=. & `var'!=0
	assert `var' == 0 | `var' == 1 | `var' == .
}

egen temp = rownonmiss(b_duration_long b_long_distance b_no_follow_up b_multiple_visits_counsel b_pat_migrate b_pat_uncoop b_pvt_care b_gain_pat_trust b_chal_low_lit b_loss_work b_challenges_oth)
egen challenges_completion_b=anycount(b_duration_long b_long_distance b_no_follow_up b_multiple_visits_counsel b_pat_migrate b_pat_uncoop b_pvt_care b_gain_pat_trust b_chal_low_lit b_loss_work b_challenges_oth), v(1)
replace challenges_completion_b = . if temp == 0
drop temp


** Effort towards detection (index)

* Number of sputum samples collected last week
mvdecode b_how_many_samp e_num_samp m_num_samp, mv(-111 -555 -888 -999)
gen sput_samp1 = m_num_samp
replace sput_samp1 = 0 if sput_samp1 == . & m_entitled == 0
gen sput_samp2 = e_num_samp
replace sput_samp2 = 0 if sput_samp2 == . & e_entitled == 0
gen sput_samp_b = b_how_many_samp 
replace sput_samp_b = 0 if sput_samp_b == . & b_entitled == 0

* Number of days spent detecting new patients last week
mvdecode b_last_visit_field m_last_visit_field e_last_visit_field, mv(-111 -555 -888 -999)
gen days_det1=m_last_visit_field
gen days_det2=e_last_visit_field
gen days_det_b=b_last_visit_field 

* Number of days spent detecting new patients per week in general
mvdecode b_visit_field m_visit_field e_visit_field, mv(-111 -555 -888 -999)
gen days_det_gen1=m_visit_field
gen days_det_gen2=e_visit_field
gen days_det_gen_b=b_visit_field 

* Remembers TB suspects
tab1 b_last_tb_suspect_yn, m nola
tab1 m_last_tb_suspect_yn, m nola
tab1 e_last_tb_suspect_yn, m nola
replace b_last_tb_suspect_yn = . if b_last_tb_suspect_yn == -111
replace m_last_tb_suspect_yn = . if m_last_tb_suspect_yn == -555
replace e_last_tb_suspect_yn = . if e_last_tb_suspect_yn == -555
gen remembers_suspect1 = m_last_tb_suspect_yn
gen remembers_suspect2 = e_last_tb_suspect_yn
gen remembers_suspect_b = b_last_tb_suspect_yn

* Number of methods used to detect new patients
foreach var of varlist m_visit_neigh m_conduct_rand_visits m_know_people m_wait_suspect m_local_TBHV m_coord_TBHV m_identify_oth1 ///
						e_visit_neigh e_conduct_rand_visits e_know_people e_wait_suspect e_local_TBHV e_coord_TBHV e_identify_oth1 e_identify_oth2 e_identify_oth3{

	replace `var'=. if `var'==-111|`var'==-333|`var'==-555
	replace `var'=1 if `var'!=. & `var'!=0
	assert `var' == 0 | `var' == 1 | `var' == .
}

egen temp = rownonmiss(m_visit_neigh m_conduct_rand_visits m_know_people m_wait_suspect m_local_TBHV m_coord_TBHV m_identify_oth1)
egen num_det_activities1= anycount(m_visit_neigh m_conduct_rand_visits m_know_people m_wait_suspect m_local_TBHV m_coord_TBHV m_identify_oth1), v( 1) 
replace num_det_activities1 = . if temp == 0
drop temp

egen temp = rownonmiss(e_visit_neigh e_conduct_rand_visits e_know_people e_wait_suspect e_local_TBHV e_coord_TBHV e_identify_oth1 e_identify_oth2 e_identify_oth3)
egen num_det_activities2 =anycount(e_visit_neigh e_conduct_rand_visits e_know_people e_wait_suspect e_local_TBHV e_coord_TBHV e_identify_oth1 e_identify_oth2 e_identify_oth3), v(1)
replace num_det_activities2 = . if temp == 0
drop temp

foreach var of varlist b_visit_neigh b_random_visits b_know_people b_wait_suspect b_local_TBHV b_identify_oth1 b_identify_oth2 b_identify_oth3{
	replace `var'=. if `var'==-111|`var'==-333|`var'==-555
	replace `var'=1 if `var'!=. & `var'!=0
}

egen temp = rownonmiss(b_visit_neigh b_random_visits b_know_people b_wait_suspect b_local_TBHV b_identify_oth1 b_identify_oth2 b_identify_oth3)
egen num_det_activities_b=anycount(b_visit_neigh b_random_visits b_know_people b_wait_suspect b_local_TBHV b_identify_oth1 b_identify_oth2 b_identify_oth3), v( 1)
replace num_det_activities_b = . if temp == 0
drop temp

* Standardizing the index components
foreach var of varlist sput_samp* days_det* remembers_suspect* num_det_activities*{
	
	di "`var'"
	su `var' if treatment == 0
	scalar m_`var' = r(mean)
	scalar s_`var' = r(sd)
	replace `var' = (`var' - m_`var') / s_`var'

}

egen index_detection_effort1 = rowmean(sput_samp1 days_det1 days_det_gen1 remembers_suspect1 num_det_activities1)
egen index_detection_effort2 = rowmean(sput_samp2 days_det2 days_det_gen2 remembers_suspect2 num_det_activities2)
egen index_detection_effort_b = rowmean(sput_samp_b days_det_b days_det_gen_b remembers_suspect_b num_det_activities_b)


** Excessive workload

// For the question "Complaints about job in last 9 months", people could provide multiple answers, and so we created one question for each answer. One of them was excessive workload, but no one picked this answer in midline. Thus, the corresponding variable was not created and so we put 0 to all health workers who answered the question at all.
gen excess_work1=0 if (m_inconvenient_loc == 1 | m_inconvenient_loc == 0)
gen excess_work2=e_excess_work if (e_excess_work == 1 | e_excess_work == 0)
gen excess_work_b=b_excess_work if (b_excess_work == 1 | b_excess_work == 0)


** Job satisfaction (index)

* Removing missing values
mvdecode *_too_long_work *_satisfaction  *_satisfaction_oa *_recommend_oa, mv(-888 -999 -111 -555)

* Satisfied with compensation
gen satisfaction1=m_satisfaction
gen satisfaction2=e_satisfaction
gen satisfaction_b=b_satisfaction
recode satisfaction1 (1=5) (2=4) (4=2) (5=1)
recode satisfaction2 (1=5) (2=4) (4=2) (5=1)
recode satisfaction_b (1=5) (2=4) (4=2) (5=1)

* Overall satisfaction with the job
gen satisfaction_oa1=m_satisfaction_oa
gen satisfaction_oa2=e_satisfaction_oa
gen satisfaction_oa_b=b_satisfaction_oa

* Recommended the job to someone else in the past 6 months
gen recommend_oa1=m_recommend_oa
gen recommend_oa2=e_recommend_oa
gen recommend_oa_b=b_recommend_oa

* Number of complaints
mvdecode b_inconvenient_loc b_inconvenient_timing b_too_long_work b_excess_work b_insuff_compensation b_fear_loss_job b_migration b_no_fix_inc b_complaint_777 b_complaint_oth ///
		 m_inconvenient_loc m_not_interest m_insuff_compensation m_fear_loss_job m_no_fixedinc m_complaint_777 m_complaint_oth ///
		 e_inconvenient_loc e_inconvenient_timing e_too_long_work e_excess_work e_insuff_compensation e_fear_loss_job e_no_fix_income  e_dif_biodevice e_another_job e_complaint_777 e_complaint_oth, mv(-888 -999 -111 -555)

egen temp_b = rowmiss(b_inconvenient_loc b_inconvenient_timing b_too_long_work b_excess_work b_insuff_compensation b_fear_loss_job b_migration b_no_fix_inc b_complaint_oth)
egen num_complaints_b = anycount(b_inconvenient_loc b_inconvenient_timing b_too_long_work b_excess_work b_insuff_compensation b_fear_loss_job b_migration b_no_fix_inc b_complaint_oth), values(1 12/29)
replace num_complaints_b = . if temp_b == 9

egen temp_m = rowmiss(m_inconvenient_loc m_not_interest m_insuff_compensation m_fear_loss_job m_no_fixedinc m_complaint_oth)
egen num_complaints1 = anycount(m_inconvenient_loc m_not_interest m_insuff_compensation m_fear_loss_job m_no_fixedinc m_complaint_oth), values(1 12/29)
replace num_complaints1 = . if temp_m == 6

egen temp_e = rowmiss(e_inconvenient_loc e_inconvenient_timing e_too_long_work e_excess_work e_insuff_compensation e_fear_loss_job e_no_fix_income  e_dif_biodevice e_another_job e_complaint_oth)
egen num_complaints2 = anycount(e_inconvenient_loc e_inconvenient_timing e_too_long_work e_excess_work e_insuff_compensation e_fear_loss_job e_no_fix_income  e_dif_biodevice e_another_job e_complaint_oth), values(1 12/29)
replace num_complaints2 = . if temp_e == 10

drop temp_?

* Standardizing the index components
foreach var of varlist satisfaction* satisfaction_oa* recommend_oa* num_complaints* {
	su `var' if treatment == 0
	scalar m_`var' = r(mean)
	scalar s_`var' = r(sd)
	replace `var' = (`var' - m_`var') / s_`var'
}

replace num_complaints_b = -1 * num_complaints_b
replace num_complaints1 = -1 * num_complaints1
replace num_complaints2 = -1 * num_complaints2

egen index_satisfaction1 = rowmean(satisfaction1 satisfaction_oa1 recommend_oa1 num_complaints1)
egen index_satisfaction2 = rowmean(satisfaction2 satisfaction_oa2 recommend_oa2 num_complaints2)
egen index_satisfaction_b = rowmean(satisfaction_b satisfaction_oa_b recommend_oa_b num_complaints_b)


** Effort towards preventing default

* Baseline

* Steps planned to prevent defaults
tab1 b_visit_regular - b_def_strat_oth, m
mvdecode b_visit_regular - b_def_strat_oth, mv(-888 -999 -111 -555)
replace  b_def_strat_777 = . if b_def_strat_oth == 20 
// The "other" strategy mentioned in code == 20 is "don't know enough about default"

egen b_missing_prevent_default = rowmiss(b_visit_regular - b_support_pat b_def_strat_777)
egen b_prevent_default = anycount(b_visit_regular - b_support_pat b_def_strat_777), values(1)
replace b_prevent_default = . if b_missing_prevent_default == 7
drop b_missing_prevent_default

* Steps taken when someone defaults
tab1 b_inform_tbhv_pm - b_def_month_oth b_call_phone b_visit_ask_center b_bring_medicine, m
mvdecode b_inform_tbhv_pm - b_arrange_transfer b_def_month_777 b_call_phone b_visit_ask_center b_bring_medicine, mv(-888 -999 -111 -555)

egen b_missing_react_default = rowmiss(b_inform_tbhv_pm - b_arrange_transfer b_def_month_777 b_call_phone b_visit_ask_center b_bring_medicine)
egen b_react_default = anycount(b_inform_tbhv_pm - b_arrange_transfer b_def_month_777 b_call_phone b_visit_ask_center b_bring_medicine), values(1)
replace b_react_default = . if b_missing_react_default == 11
drop b_missing_react_default

* Any default patient who health worker worked with
tab1 b_person_counsel, m nola
replace b_person_counsel = . if b_person_counsel == -111
replace b_person_counsel = 0 if b_person_counsel == -999 
// Imputing "Does not know" as "0"

* Number of days spent visiting patients per week (last week and in general)
mvdecode b_visit_patient b_last_visit_patient, mv(-111 -555 -888 -999)
gen days_pat_b = b_last_visit_patient
gen days_pat_gen_b = b_visit_patient

* Midline

* Steps planned to prevent defaults
tab1 m_visit_regular - m_def_strat_oth1, m
mvdecode m_visit_regular - m_def_strat_oth1, mv(-888 -999 -111 -555)

egen m_missing_prevent_default = rowmiss(m_visit_regular - m_support_pat m_def_strat_777)
egen m_prevent_default = anycount(m_visit_regular - m_support_pat m_def_strat_777), values(1)
replace m_prevent_default = . if m_missing_prevent_default == 7
drop m_missing_prevent_default

* Steps taken when someone defaults
tab1 m_inform_tbhv_pm - m_arrange_transfer  m_call_phone m_visit_ask_center m_bring_medicine, m nola
mvdecode m_inform_tbhv_pm-m_arrange_transfer  m_call_phone m_visit_ask_center m_bring_medicine, mv(-888 -999 -111 -555)

egen m_missing_react_default = rowmiss(m_inform_tbhv_pm-m_arrange_transfer m_call_phone m_visit_ask_center m_bring_medicine)
egen m_react_default = anycount(m_inform_tbhv_pm-m_arrange_transfer m_call_phone m_visit_ask_center m_bring_medicine), values(1)
replace m_react_default = . if m_missing_react_default == 10
drop m_missing_react_default

* Any default patient who health worker worked with
tab1 m_person_counsel, m
replace m_person_counsel = . if inlist(m_person_counsel, -111, -555)
replace m_person_counsel = 0 if m_person_counsel == -999 
// Imputing "Does not know" as "0" 

* Number of days spent visiting patients per week (last week and in general)
mvdecode m_visit_patient m_last_visit_patient, mv(-111 -555 -888 -999)
gen days_pat_m = m_last_visit_patient
gen days_pat_gen_m = m_visit_patient

* Endline

* Steps planned to prevent defaults
tab1 e_visit_regular - e_def_strat_oth, m
mvdecode e_visit_regular - e_def_strat_oth, mv(-888 -999 -111 -555)

egen e_missing_prevent_default = rowmiss(e_visit_regular - e_support_pat e_def_strat_777)
egen e_prevent_default = anycount(e_visit_regular - e_support_pat e_def_strat_777), values(1)
replace e_prevent_default = . if e_missing_prevent_default == 7
drop e_missing_prevent_default

* Steps taken when someone defaults
tab1 e_inform_tbhv_pm - e_arrange_transfer e_call_phone e_visit_ask_center e_bring_medicine, m nola
mvdecode e_inform_tbhv_pm - e_arrange_transfer e_call_phone e_visit_ask_center e_bring_medicine, mv(-888 -999 -111 -555)

egen e_missing_react_default = rowmiss(e_inform_tbhv_pm - e_arrange_transfer e_call_phone e_visit_ask_center e_bring_medicine)
egen e_react_default = anycount(e_inform_tbhv_pm - e_arrange_transfer e_call_phone e_visit_ask_center e_bring_medicine), values(1)
replace e_react_default = . if e_missing_react_default == 8
drop e_missing_react_default

* Any default patient who health worker worked with
tab1 e_person_counsel, m
replace e_person_counsel = . if inlist(e_person_counsel, -111, -555)
replace e_person_counsel = 0 if e_person_counsel == -999 
// Imputing "Does not know" as "0" 

* Number of days spent visiting patients per week (last week and in general)
mvdecode e_visit_patient e_last_visit_patient, mv(-111 -555 -888 -999)
gen days_pat_e = e_last_visit_patient
gen days_pat_gen_e = e_visit_patient

* Standardizing the index components
foreach var of varlist ?_prevent_default ?_react_default ?_person_counsel days_pat_? days_pat_gen_? {
	
	su `var' if treatment == 0
	scalar mean_`var' = r(mean)
	scalar sd_`var' = r(sd)
	replace `var' = (`var' - mean_`var') / sd_`var'
	
}

egen index_default_prev_effort1 = rowmean(m_prevent_default m_react_default m_person_counsel days_pat_m days_pat_gen_m)
egen index_default_prev_effort2 = rowmean(e_prevent_default e_react_default e_person_counsel days_pat_e days_pat_gen_e)
egen index_default_prev_effort_b = rowmean(b_prevent_default b_react_default b_person_counsel days_pat_b days_pat_gen_b)


** Number of challenges faced in detecting patients

foreach sur in b m e {
	
	foreach var in find_new_pat trav_long_dist pat_follow_up multiple_visits hosp_resp pat_uncooperative pat_denial gaining_trust low_literacy pat_work_hours {
	
		replace `sur'_`var'=. if `sur'_`var'==-111|`sur'_`var'==-555|`sur'_`var'==-999 // "Missing", "Skip" & "Does not know" (in that order).
		replace `sur'_`var'=1 if `sur'_`var'!=. & `sur'_`var'!=0
		assert `sur'_`var' == 0 | `sur'_`var' == 1 | `sur'_`var' == .
	}
	
	egen temp = rownonmiss(`sur'_find_new_pat `sur'_trav_long_dist `sur'_pat_follow_up `sur'_multiple_visits `sur'_hosp_resp `sur'_pat_uncooperative `sur'_pat_denial `sur'_gaining_trust `sur'_low_literacy `sur'_pat_work_hours)
	egen challenges_detection_`sur' = anycount(`sur'_find_new_pat `sur'_trav_long_dist `sur'_pat_follow_up `sur'_multiple_visits `sur'_hosp_resp `sur'_pat_uncooperative `sur'_pat_denial `sur'_gaining_trust `sur'_low_literacy `sur'_pat_work_hours), v(1)
	assert challenges_detection_`sur' == 0 if `sur'_no_challenges == 1
	replace challenges_detection_`sur' = . if temp == 0
	drop temp
}

rename (challenges_detection_m challenges_detection_e) (challenges_detection1 challenges_detection2)


** Number of challenges faced by patients

foreach sur in b m {
	
	foreach var in lack_adherence travel_diff side_eff pvtcare_short pat_follup no_support difficulty_oth {
	
		replace `sur'_`var'=. if `sur'_`var'==-111|`sur'_`var'==-555|`sur'_`var'==-999 // "Missing", "Skip" & "Does not know" (in that order).
		replace `sur'_`var'=1 if `sur'_`var'!=. & `sur'_`var'!=0
		assert `sur'_`var' == 0 | `sur'_`var' == 1 | `sur'_`var' == .
	
	}
	
	egen temp = rownonmiss(`sur'_lack_adherence `sur'_travel_diff `sur'_side_eff `sur'_pvtcare_short `sur'_pat_follup `sur'_no_support `sur'_difficulty_oth)
	egen challenges_patients_`sur' = anycount(`sur'_lack_adherence `sur'_travel_diff `sur'_side_eff `sur'_pvtcare_short `sur'_pat_follup `sur'_no_support `sur'_difficulty_oth), v(1)
	replace challenges_patients_`sur' = . if temp == 0
	drop temp
}
rename challenges_patients_m challenges_patients1

foreach var of varlist e_lack_adherence e_travel_diff e_side_eff e_pvtcare_short e_pat_follup e_no_support {
	replace `var'=. if `var'==-111|`var'==-555|`var'==-999 // "Missing", "Skip" & "Does not know" (in that order).
	replace `var'=1 if `var'!=. & `var'!=0
	assert `var' == 0 | `var' == 1 | `var' == .
}

egen temp = rownonmiss(e_lack_adherence e_travel_diff e_side_eff e_pvtcare_short e_pat_follup e_no_support)
egen challenges_patients2 = anycount(e_lack_adherence e_travel_diff e_side_eff e_pvtcare_short e_pat_follup e_no_support), v(1)
replace challenges_patients2 = . if temp == 0
drop temp


** Impression of biometric devices

tab1 m_impbio_now e_impbio_now e_bio_scaleup m_bio_scaleup, m nola
mvdecode m_impbio_now e_impbio_now e_bio_scaleup m_bio_scaleup, mv(-111 -555 -888 -999)

gen imp_biom1 = 5 - m_impbio_now
gen imp_biom2 = 5 - e_impbio_now

gen biom_scaleup1 = m_bio_scaleup
gen biom_scaleup2 = e_bio_scaleup

* Descriptive statistics on impression of biometric devices: reported in Section 6

preserve

	keep Unique_ID treatment imp_biom1 imp_biom2 biom_scaleup1 biom_scaleup2 Couns_Type
	
	reshape long imp_biom biom_scaleup, i(Unique_ID) j(center_no)
	
	label variable imp_biom "What is your impression of the biometric device now?"
	label variable biom_scaleup "Do you think Operation ASHA should scale up the use of biometrics?"
	tab imp_biom if treatment == 1
	// 4: very useful; 3: useful; 2: somewhat useful; 1: not useful; 0: causes only negative impact
	tab biom_scaleup if treatment == 1

restore


** Reshaping to create long versions of endline and midline variables

reshape long challenges_completion challenges_detection challenges_patients ///
			index_detection_effort excess_work index_satisfaction index_default_prev_effort ///
			imp_biom biom_scaleup, ///
			i(Unique_ID) j(survey)

			
** Regressions: Table A8, columns 2 and 3, Table 8, Panel A, columns 3, 4, 5, and 6, and Table 8, Panel B, column 3

* Sidak-Holm correction

preserve

foreach y in challenges_completion challenges_detection excess_work index_satisfaction {
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 `y'_b $hw_controls, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep challenges_completion_p* challenges_detection_p* excess_work_p* index_satisfaction_p*
gen id = 99
keep in 1

reshape long challenges_completion_p challenges_detection_p excess_work_p index_satisfaction_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table8_PanelA_2_sh.dta", replace
	
restore

preserve

foreach y in challenges_patients{
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 `y'_b $hw_controls, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep challenges_patients_p*
gen id = 99
keep in 1

reshape long challenges_patients_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table8_PanelB_2_sh.dta", replace
	
restore

su index_default_prev_effort if treatment == 0 	
local mean_control=r(mean)
ivreg2 index_default_prev_effort treatment final_stratum_id1-final_stratum_id13 index_default_prev_effort_b $hw_controls, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table8_PanelA_2.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Baseline control, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

foreach var of varlist index_detection_effort challenges_completion challenges_detection excess_work index_satisfaction challenges_patients {

	su `var' if treatment == 0 	
	local mean_control=r(mean)
	ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 `var'_b $hw_controls, small cl(uid_cluster)   
	quietly outreg2 treatment using "Results/Paper/Table8_PanelA_2.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Baseline control, Yes, Health worker controls, Yes) adec(3) dec(3) keep(treatment)

	}


****************************************
*** PART-5 *** Estimating the treatment's impact on health worker effort and patient satisfaction, using patient surveys
****************************************

** Calling and merging datasets

use "Data/Original/patient_surveys_attempts.dta", clear

merge 1:1 q1_ques_code a02_entry_exit using "Data/Original/patient_surveys.dta"
drop _merge

merge m:1 q1_ques_code using "Data/Intermediate/patient_controls.dta"
drop _merge

merge m:1 Unique_ID UID_Center using "Data/Original/hw_roster.dta"
keep if _merge == 3
drop _merge

merge 1:1 q1_ques_code a02_entry_exit using "Data/Intermediate/verified_patients.dta"
keep if _merge == 3
drop _merge

global patient_controls = "pat_male pat_age pat_caste_general pat_hindu pat_rw_both pat_ownhouse pat_hhd_size pat_migrate_always pat_migrate_6plus pat_time_to_center pat_dum_male pat_dum_age pat_dum_caste_general pat_dum_hindu pat_dum_rw_both pat_dum_ownhouse pat_dum_hhd_size pat_dum_migrate_always pat_dum_migrate_6plus pat_dum_time_to_center"


** Estimating the treatment's impact on the support received from Operation ASHA

* Creating outcome variables

tab k2_often_see_asha k2_often_see_asha_unit, m
tab k2_often_see_asha k2_often_see_asha_unit_oth, m

* Frequency of interaction with Operation ASHA
gen interact_freq = .
// Making uniform units, based on per-week frequency, since it is the most popular unit.
replace interact_freq = k2_often_see_asha * 7 if k2_often_see_asha_unit == 1 // "Daily"
replace interact_freq = k2_often_see_asha if k2_often_see_asha_unit == 2 // "Weekly"
replace interact_freq = k2_often_see_asha * (7/30) if k2_often_see_asha_unit == 3 // "Monthly"
replace interact_freq = 0 if k2_often_see_asha_unit == 4 //"Never"
// Only ~3% patients report "other" code in units of frequency.
// When "other" units are used, in 87% of the cases it is "Until now only once." Also, for the patients who use this unit, many report the value as more than 1. This is contradictory, and in any case, hard to translate into "per week" frequency. Hence, ignoring those patients.
label var interact_freq "Frequency of interaction with health workers"

* Operation ASHA support
gen oa_support_during_trt = inrange(k11_oa_support_treat,1,2) if !inlist(k11_oa_support_treat,.,-999)

* Home delivery of pills
gen hv_pill_delivery = inrange(k17_cdp_medici_home,1,2) if !inlist(k17_cdp_medici_home,.,-999)
 
* Health worker gives advice related to TB
gen gives_tb_advice=.
replace gives_tb_advice=1 if k9_asha_advice_tb==1|k9_asha_advice_tb==2
replace gives_tb_advice=0 if k9_asha_advice_tb==3
label var gives_tb_advice "Health worker gives advice related to TB"

* Defining sample: all verified patients, and using their last survey

gen sample_temp = last_complete == 1 & verified_pat == 1

* Regressions: Table A8, columns 4, 5, 6, and 7

su gives_tb_advice if treatment == 0 & sample_temp == 1
local mean_control=r(mean)
ivreg2 gives_tb_advice treatment final_stratum_id1-final_stratum_id13 post_exp entry_survey $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA8.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

foreach var in interact_freq oa_support_during_trt hv_pill_delivery{

su `var' if treatment == 0 & sample_temp == 1
local mean_control=r(mean)
ivreg2 `var' treatment final_stratum_id1-final_stratum_id13 post_exp entry_survey $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableA8.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

}


** Estimating the treatment's impact on satisfaction with Operation ASHA

drop sample_temp
gen sample_temp = (a02_entry_exit == 2 | a02_entry_exit == 3 | a02_entry_exit == 4 | a02_entry_exit == 5) & verified_pat == 1

* Creating outcome variables

* Satisfaction with Operation ASHA							

gen satisfaction_oa = k20_satisfi_interact_oa if !missing(k20_satisfi_interact_oa) & k20_satisfi_interact_oa > 0
recode satisfaction_oa (1/2 = 1) (3/4 = 0)
		
* Recommends Operation ASHA to other TB patients

gen recommend_oa = e14_tb_who_refer if !inlist(e14_tb_who_refer,.,-999) | !inlist(e14_tb_who_refer_oth,.)
recode recommend_oa (-777 2/7 = 0)
replace recommend_oa = 1 if e14_tb_who_refer == -777 & e14_tb_who_refer_oth == 10

* Sidak-Holm correction

preserve

foreach y in satisfaction_oa recommend_oa{
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep satisfaction_oa_p* recommend_oa_p*
gen id = 99
keep in 1
reshape long satisfaction_oa_p recommend_oa_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table8_PanelB_1_sh.dta", replace
	
restore

* Regressions: Table 8, Panel B, columns 1 and 2

su satisfaction_oa if treatment == 0 & sample_temp == 1
local mean_control=r(mean)
ivreg2 satisfaction_oa treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table8_PanelB.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

su recommend_oa if treatment == 0 & sample_temp == 1
local mean_control=r(mean)
ivreg2 recommend_oa treatment final_stratum_id1-final_stratum_id13 post_exp $patient_controls if sample_temp == 1, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table8_PanelB.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

* Restricting the sample to patients detected before experiment start date: Table B3

su satisfaction_oa if treatment == 0 & sample_temp == 1 & post_exp == 0
local mean_control=r(mean)
ivreg2 satisfaction_oa treatment final_stratum_id1-final_stratum_id13 $patient_controls if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB3.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)

su recommend_oa if treatment == 0 & sample_temp == 1 & post_exp == 0
local mean_control=r(mean)
ivreg2 recommend_oa treatment final_stratum_id1-final_stratum_id13 $patient_controls if sample_temp == 1 & post_exp == 0, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Appendix/TableB3.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Patient controls, Yes) adec(3) dec(3) keep(treatment)


****************************************
*** PART-6 *** Estimating the treatment's impact on health worker compensation
****************************************

** Calling and merging datasets

use "Data/Original/salary_slips.dta", clear

merge m:1 Unique_ID using "Data/Intermediate/hw_roster_by_hw.dta"
count if _merge ~= 3
drop _merge

merge m:1 Unique_ID using "Data/Intermediate/hw_controls.dta"
count if _merge ~= 3
drop _merge

global hw_controls = "b_age_couns b_jobs_bef_oa_couns male_couns gen_caste_couns hindu_couns twelveandbelow_couns tertiary_couns hhd_size_couns ownhouse_couns b_age_dum_couns b_jobs_bef_oa_dum_couns male_dum_couns gen_caste_dum_couns hindu_dum_couns twelveandbelow_dum_couns tertiary_dum_couns hhd_size_dum_couns ownhouse_dum_couns"


** Creating the outcome variables

* Total salary
label var TotalSalary "Monthly Salary (Op ASHA reported)"

* Enrollment Incentives
gen Detection_Incentives_Payment = Incentive_Enrollment
replace Detection_Incentives_Payment = Detection_Incentives_Payment + Incentive_Enrollment_new if Incentive_Enrollment_new ~= .


** Descriptive statistics: reported in Section 2.2

gen city_code = strofreal(state_id) + strofreal(city_id)

* Default incentives

tab Unique_ID Incentive_Default if inlist(city_code,"1622","1623"), m
// Default Incentives are only paid to 3 health workers. They are paid for all the months for which these health workers are in our experiment
// 162341 (control group): 14 months. In Korba
// 162243 (control group): 3 months. In Durg/Bhilai
// 162342 (treatment group): 1 month. In Korba

* Detection incentives

tab Incentive_Enrollment city_code, m
tab Incentive_Enrollment_new city_code, m
// Enrollment incentives are paid in 6 out of 9 cities. 
// 3 cities without these incentives: Korba, Bhubaneswar, Durg/Bhilai

gen detect_incent_percent = Detection_Incentives_Payment/TotalSalary if !missing(Detection_Incentives_Payment)
su detect_incent_percent


** Regressions: Table 8, Panel A, columns 1 and 2

* Sidak-Holm correction

preserve

foreach y in Detection_Incentives_Payment TotalSalary{
	ivreg2 `y' treatment final_stratum_id1-final_stratum_id13 $hw_controls, small cl(uid_cluster)   
	test treatment
	gen `y'_p2 = `r(p)'
}	

keep Detection_Incentives_Payment_p* TotalSalary_p*
gen id = 99
keep in 1

reshape long Detection_Incentives_Payment_p TotalSalary_p, i(id) j(test) 
drop id
xpose, clear varname
drop in 1
rename v1 pval_2
rename _varname variable

save "Data/Intermediate/Table8_PanelA_1_sh.dta", replace
	
restore

su Detection_Incentives_Payment if treatment == 0 	
local mean_control=r(mean)
ivreg2 Detection_Incentives_Payment treatment final_stratum_id1-final_stratum_id13 $hw_controls, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table8_PanelA_1.out", replace nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) dec(1) keep(treatment)

su TotalSalary if treatment == 0 	
local mean_control=r(mean)
ivreg2 TotalSalary treatment final_stratum_id1-final_stratum_id13 $hw_controls, small cl(uid_cluster)   
quietly outreg2 treatment using "Results/Paper/Table8_PanelA_1.out", append nolabel asterisk(se) nocons nonote se addstat("Mean in Control Group", `mean_control') addtext(Strata fixed effects, Yes, Health worker controls, Yes) dec(1) keep(treatment)


** Sidak-Holm correction

* For Table 4

use "Data/Intermediate/Table4_1_sh.dta", clear
append using "Data/Intermediate/Table4_2_sh.dta"

gen number = _N
foreach y in pval_2 {
	sort `y'
	gen `y'_k=(_N+1)-_n	
	generate `y'_sidak=1-(1-`y')^`y'_k
	replace  `y'_sidak=`y'_sidak[_n-1] if `y'_sidak[_n-1]>`y'_sidak in 2/L
	replace  `y'_sidak=1 if `y'_sidak>1 & `y'_sidak~=.
	drop `y'_k
}
drop number
order variable pval_2*

* For Table 8, Panel A

use using "Data/Intermediate/Table8_PanelA_2_sh.dta", clear
append using "Data/Intermediate/Table8_PanelA_1_sh.dta"

gen number = _N
foreach y in  pval_2 {
	sort `y'
	gen `y'_k=(_N+1)-_n	
	generate `y'_sidak=1-(1-`y')^`y'_k
	replace  `y'_sidak=`y'_sidak[_n-1] if `y'_sidak[_n-1]>`y'_sidak in 2/L
	replace  `y'_sidak=1 if `y'_sidak>1 & `y'_sidak~=.
	drop `y'_k
}
drop number
order variable pval_2*

* For Table 8, Panel B

use "Data/Intermediate/Table8_PanelB_1_sh.dta", clear
append using "Data/Intermediate/Table8_PanelB_2_sh.dta"

gen number = _N
foreach y in  pval_2 {
	sort `y'
	gen `y'_k=(_N+1)-_n	
	generate `y'_sidak=1-(1-`y')^`y'_k
	replace  `y'_sidak=`y'_sidak[_n-1] if `y'_sidak[_n-1]>`y'_sidak in 2/L
	replace  `y'_sidak=1 if `y'_sidak>1 & `y'_sidak~=.
	drop `y'_k
}
drop number
order variable pval_2*